#SETUP for THEMES if (!require("pacman"))install.packages("pacman")
Loading required package: pacman
pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr, forcats)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)devtools::install_github("tidyverse/dsbox")
Skipping install of 'dsbox' from a github remote, the SHA1 (244ecdfe) has not changed since last install.
Use `force = TRUE` to force installation
library(dsbox)my_edibnb_data <- edibnb#make a ridge plot of AirBnB review scores of Edinburgh neighborhoods, ordered by their median review scores#stat_summary() function to summarize your data. To order the data by mean, you can use the fct_reorder() function from the forcats package to reorder the factor levels based on the calculated means. Note: this did not work for me for some reason#edibnb %>% # group_by(neighbourhood) %>%# summarize(mean_score = mean(review_scores_rating), .groups = "drop") %>% # mutate(neighbourhood = fct_reorder(neighbourhood, mean_score)) %>% # geom_density_ridges(review_scores_rating ~ fct_reorder(neighbourhood, review_scores_rating))+edibnb %>%ggplot(aes(x = review_scores_rating, y = neighbourhood))+geom_density_ridges()+labs(x ="Review score ratings",y ="Neighbourhood",title ="AirBNB review score ratings by Edinburgh neighbourhoods",#sources for calculating the means of the scores, how to reorder the means and the geom density plotcaption ="Source: TidyTuesday" )
Picking joint bandwidth of 1.21
Warning: Removed 2177 rows containing non-finite outside the scale range
(`stat_density_ridges()`).
#view(edibnb)#glimpse(edibnb)
2 - Foreign Connected PACs
#| label: SETUP#SETUP for THEMES if (!require("pacman"))install.packages("pacman")pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)# get a list of files with "Foreign Connected PAC" in their nameslist_of_files <-dir_ls(path ="data", regexp ="Foreign Connected PAC")# read all files and row bind them# keeping track of the file name in a new column called yearpac <-read_csv(list_of_files, id ="year")
Rows: 2394 Columns: 6
── Column specification ─────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Compa...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean the names and convert character amounts to numericpac %>% janitor::clean_names()
pac$Dems =as.numeric(gsub("\\$", "", pac$Dems))pac$Repubs =as.numeric(gsub("\\$", "", pac$Repubs))# Note: another way to format a date into just the year:# pac$year = format(pac$year, "%Y")#Datawrangling#This.was.hard#split Country/Parent column using '/' as the separatorpac[c('country', 'parent')] <-str_split_fixed(pac$`Country of Origin/Parent Company`, '/', 2)#split year column using '.' as the separatorpac <-separate(pac, year, into =c("full", "csv"), sep ="[.]", extra ="drop")#split full column using '-' as the separatorpac[c('first', 'year')] <-str_split_fixed(pac$`full`, '-', 2)#convert year to numeric(double)pac$year <-as.numeric(as.character(pac$year))#pac <- arrange(year) %>% #Renamepac <- pac %>%rename_at('PAC Name (Affiliate)', ~'pac_name_affilate')#dropping a columnpac <-subset(pac, select =-c(full, csv, Total, `Country of Origin/Parent Company`, first))#Rename partyspac <- pac %>%rename_at ('Dems', ~'Party_D')pac <- pac %>%rename_at ('Repubs', ~'Party_R')pac <- pac %>%filter(country =="UK")# select and order the columnspac <- pac %>%select(country, year, Party_D, Party_R) #add the totals per year (did not work)#pac %>% # tapply(pac$Party_D, pac$year, function = sum)+#tapply(pac$Party_R, pac$year, function = sum)#this didnt work either#pac <- pac %>% # group_by(year) %>% # summarise(Party_D)+# summarise(Party_R)#neither did this#summarizing the amount per year#group_by(year) %>% #summarise(amount = sum(amount))%>% #pivot long did not work at first , now it doespac <- pac %>%pivot_longer(cols =starts_with("Party"),names_to ="party",values_to ="amount",values_drop_na =TRUE)pac <- pac %>%group_by(year, party, .drop =TRUE) %>%summarize(total_contributions =sum(amount))
`summarise()` has grouped output by 'year'. You can override
using the `.groups` argument.
pac %>%ggplot(aes(x = year, y = total_contributions, color = party))+geom_line(show.legend =TRUE)+geom_line(linewidth =1) +theme_classic() +scale_color_manual(values =c("blue", "red"),labels =c("Democrat", "Republican"))+scale_y_continuous(breaks =seq(from =0, to =3000000, by =1000000),labels =comma_format(big.mark =","))+#scale_y_discrete (label_dollar ("1M", "2M", "3M"))+labs(y ="Total amount",x ="Year",title ="Contributions to US political parties from UK-connected PACs",caption ="OpenSecrets.org")
#view(pac)#glimpse(pac)
2b - Foreign Connected PACs
#| label: SETUP#SETUP for THEMES if (!require("pacman"))install.packages("pacman")pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)# get a list of files with "Foreign Connected PAC" in their nameslist_of_files <-dir_ls(path ="data", regexp ="Foreign Connected PAC")# read all files and row bind them# keeping track of the file name in a new column called yearpac <-read_csv(list_of_files, id ="year")
Rows: 2394 Columns: 6
── Column specification ─────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Compa...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean the names and convert character amounts to numericpac %>% janitor::clean_names()
pac$Dems =as.numeric(gsub("\\$", "", pac$Dems))pac$Repubs =as.numeric(gsub("\\$", "", pac$Repubs))# Note: another way to format a date into just the year:# pac$year = format(pac$year, "%Y")#Datawrangling#This.was.hard#split Country/Parent column using '/' as the separatorpac[c('country', 'parent')] <-str_split_fixed(pac$`Country of Origin/Parent Company`, '/', 2)#split year column using '.' as the separatorpac <-separate(pac, year, into =c("full", "csv"), sep ="[.]", extra ="drop")#split full column using '-' as the separatorpac[c('first', 'year')] <-str_split_fixed(pac$`full`, '-', 2)#convert year to numeric(double)pac$year <-as.numeric(as.character(pac$year))#pac <- arrange(year) %>% #Renamepac <- pac %>%rename_at('PAC Name (Affiliate)', ~'pac_name_affilate')#dropping a columnpac <-subset(pac, select =-c(full, csv, Total, `Country of Origin/Parent Company`, first))#Rename partyspac <- pac %>%rename_at ('Dems', ~'Party_D')pac <- pac %>%rename_at ('Repubs', ~'Party_R')pac <- pac %>%filter(country =="Germany")# select and order the columnspac <- pac %>%select(country, year, Party_D, Party_R) #pivot long did not work at first , now it doespac <- pac %>%pivot_longer(cols =starts_with("Party"),names_to ="party",values_to ="amount",values_drop_na =TRUE)pac <- pac %>%group_by(year, party, .drop =TRUE) %>%summarize(total_contributions =sum(amount))
`summarise()` has grouped output by 'year'. You can override
using the `.groups` argument.
pac %>%ggplot(aes(x = year, y = total_contributions, color = party))+geom_line(show.legend =TRUE)+geom_line(linewidth =1) +theme_classic() +scale_color_manual(values =c("blue", "red"),labels =c("Democrat", "Republican"))+scale_y_continuous(breaks =seq(from =0, to =3000000, by =500000),labels =comma_format(big.mark =","))+#scale_y_discrete (label_dollar ("1M", "2M", "3M"))+labs(y ="Total amount",x ="Year",title ="Contributions to US political parties from German-connected PACs",caption ="OpenSecrets.org")
#view(pac)#glimpse(pac)
3 - Median housing prices in the US
#SETUP for THEMES if (!require("pacman"))install.packages("pacman")pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)# load the dataset housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl (1): MSPUS
date (1): DATE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the datehousing %>%mutate(date =mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
DATE MSPUS date
<date> <dbl> <date>
1 1963-01-01 17800 NA
2 1963-04-01 18000 NA
3 1963-07-01 17900 NA
4 1963-10-01 18500 NA
5 1964-01-01 18500 NA
6 1964-04-01 18900 NA
7 1964-07-01 18900 NA
8 1964-10-01 19400 NA
9 1965-01-01 20200 NA
10 1965-04-01 19800 NA
# ℹ 224 more rows
#Rename pricehousing <- housing %>%rename_at ('MSPUS', ~'price')housing <- housing %>%rename_at ('DATE', ~'date')#split date column using '-' as the separatorhousing [c('year', 'month')] <-str_split_fixed(housing$`date`, '-', 2)#dropping a column - no need to drop month and date#housing <- subset(housing, select = -c(month, date))#convert year to dblhousing$year <-as.numeric(as.character(housing$year)) # Define x-axis break interval as 10 years (did not work)#year_min <- min(housing$year, na.rm = TRUE)#year_max <- max(housing$year, na.rm = TRUE)#If I put the x=date, the line is correct but the labels are all piled up/ OR the error message "Can't convert `x` <date> to <double>.".#If I put x= year, the line is choppy but the labels are correct#Finally resolved this issue using scale_x_date with breakshousing %>%ggplot(aes(x = date,y = price, )) +geom_line(color ="blue")+scale_x_date(date_breaks ="5 years", date_labels ="%Y")+#scale_x_continuous(breaks = seq(from = 1965, to = 2020, by = 5)) +#scale_x_discrete(labels = c("1965", "1970", "1975", "1980", "1985", "1990", "1995", "2000", "2005", "2010", "2015", "2020"))+#tried to add comma's in the y-axis numbers (unsuccessful at first!) using the labels=comma:scale_y_continuous(breaks =seq(from =0, to =440000, by =40000),labels =comma_format(big.mark =","))+labs(x ="",y ="Dollars",title ="Median sales price of houses sold in the United States", subtitle ="Not seasonally adjusted",caption ="Sources: Census;HUD" )+theme(plot.title =element_text(hjust =-1),plot.subtitle =element_text(hjust =-0.15))+#scale_x_date(date_labels = year)+# Hide the lyear# Hide the legendtheme(legend.position ="none")
#glimpse(housing)#view(housing)
3b - Median housing prices in the US: RECESSION LINES
#SETUP for THEMES if (!require("pacman"))install.packages("pacman")pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)# load the dataset housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl (1): MSPUS
date (1): DATE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the datehousing %>%mutate(date =mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
DATE MSPUS date
<date> <dbl> <date>
1 1963-01-01 17800 NA
2 1963-04-01 18000 NA
3 1963-07-01 17900 NA
4 1963-10-01 18500 NA
5 1964-01-01 18500 NA
6 1964-04-01 18900 NA
7 1964-07-01 18900 NA
8 1964-10-01 19400 NA
9 1965-01-01 20200 NA
10 1965-04-01 19800 NA
# ℹ 224 more rows
#Rename pricehousing <- housing %>%rename_at ('MSPUS', ~'price')housing <- housing %>%rename_at ('DATE', ~'date')#split date column using '-' as the separatorhousing [c('year', 'month')] <-str_split_fixed(housing$`date`, '-', 2)#dropping a column - no need to drop month and date#housing <- subset(housing, select = -c(month, date))#convert year to dblhousing$year <-as.numeric(as.character(housing$year)) #housing %>% # mutate(recession = if_else(price>, "TRUE", "FALSE"))# Define x-axis break interval as 10 years (did not work)#year_min <- min(housing$date, na.rm = TRUE)#year_max <- max(housing$date, na.rm = TRUE)#If I put the x=date, the line is correct but the labels are all piled up.#If I put x= year, the line is choppy but the labels are correcthousing %>%ggplot(aes(x = date,y = price, )) +geom_line(color ="blue")+scale_x_date(date_breaks ="5 years", date_labels ="%Y")+scale_y_continuous(breaks =seq(from =0, to =440000, by =40000),labels =comma_format(big.mark =","))+labs(x ="",y ="Dollars",title ="Median sales price of houses sold in the United States", subtitle ="Not seasonally adjusted",caption ="Shaded areas indicate U.S. recessions \nSources: Census; HUD" )+theme(plot.title =element_text(hjust =-1.2),plot.subtitle =element_text(hjust =-0.2))+#add recession grey areas to the plot (not working)#geom_recessions(#fill = "#BDCFDE",#alpha = 1,#draw_top_bar = TRUE,#top_fill = "#bdcfde",#top_alpha = 1,#method = c("peak","midpoint", "trough" ))+#scale_x_date(date_labels = year)+# Hide the lyear# Hide the legendtheme(legend.position ="none")
3C - Subset of Median Housing 2019 - 2020
#SETUP for THEMES if (!require("pacman"))install.packages("pacman")pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)# load the dataset housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl (1): MSPUS
date (1): DATE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the datehousing %>%mutate(date =mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
DATE MSPUS date
<date> <dbl> <date>
1 1963-01-01 17800 NA
2 1963-04-01 18000 NA
3 1963-07-01 17900 NA
4 1963-10-01 18500 NA
5 1964-01-01 18500 NA
6 1964-04-01 18900 NA
7 1964-07-01 18900 NA
8 1964-10-01 19400 NA
9 1965-01-01 20200 NA
10 1965-04-01 19800 NA
# ℹ 224 more rows
#Rename pricehousing <- housing %>%rename_at ('MSPUS', ~'price')housing <- housing %>%rename_at ('DATE', ~'date')#split date column using '-' as the separatorhousing [c('year', 'month')] <-str_split_fixed(housing$`date`, '-', 2)#dropping a column - no need to drop month and date#housing <- subset(housing, select = -c(month, date))#convert year to dblhousing$year <-as.numeric(as.character(housing$year)) housing |>filter(date >ymd(20181231), date <ymd(20210101)) %>%ggplot(aes(x = date,y = price, )) +geom_line(color ="blue")+geom_point(color ="blue",fill ="white",shape =1 )+#unable to add in the quarter labels using scale_x_discrete#scale_x_discrete(labels("Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4")+scale_y_continuous(breaks =seq(from =280000, to =380000, by =20000),labels =comma_format(big.mark =","))+labs(x ="2019 2020",y ="Dollars",title ="Median sales price of houses sold in the United States", subtitle ="Not seasonally adjusted",caption ="" )+theme(plot.title =element_text(hjust =-0.60),plot.subtitle =element_text(hjust =-0.10))+# Hide the lyear# Hide the legendtheme(legend.position ="none")
4 - Expect More. Plot More.
#SETUP for THEMES if (!require("pacman"))install.packages("pacman")pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)library(ggplot2)df =data.frame(subject <-c('A','B','C'), value <-c(100,100,100))ggplot(df, aes(x = subject, y = value, fill = subject....c..A....B....C..)) +scale_fill_manual(values =c("red","white","red")) +geom_col() +scale_x_discrete(limits =c("A","B","C")) +coord_polar("y")+#removed labels and backgroundtheme_void()+labs(x ="",y ="",caption ="TARGET" )+theme(legend.position ="none")+theme(plot.caption =element_text(color ="red", size =36, face ="bold", hjust =0.5))
5 - Mirror, mirror on the wall, who’s the ugliest of them all?
#SETUP for THEMES if (!require("pacman"))install.packages("pacman")pacman::p_load(here)pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))options(width =65)knitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)palmerpenguins::penguins
# A tibble: 344 × 8
species island bill_length_mm bill_depth_mm flipper_length_mm
<fct> <fct> <dbl> <dbl> <int>
1 Adelie Torger… 39.1 18.7 181
2 Adelie Torger… 39.5 17.4 186
3 Adelie Torger… 40.3 18 195
4 Adelie Torger… NA NA NA
5 Adelie Torger… 36.7 19.3 193
6 Adelie Torger… 39.3 20.6 190
7 Adelie Torger… 38.9 17.8 181
8 Adelie Torger… 39.2 19.6 195
9 Adelie Torger… 34.1 18.1 193
10 Adelie Torger… 42 20.2 190
# ℹ 334 more rows
# ℹ 3 more variables: body_mass_g <int>, sex <fct>, year <int>
penguins %>% janitor::clean_names()
# A tibble: 344 × 8
species island bill_length_mm bill_depth_mm flipper_length_mm
<fct> <fct> <dbl> <dbl> <int>
1 Adelie Torger… 39.1 18.7 181
2 Adelie Torger… 39.5 17.4 186
3 Adelie Torger… 40.3 18 195
4 Adelie Torger… NA NA NA
5 Adelie Torger… 36.7 19.3 193
6 Adelie Torger… 39.3 20.6 190
7 Adelie Torger… 38.9 17.8 181
8 Adelie Torger… 39.2 19.6 195
9 Adelie Torger… 34.1 18.1 193
10 Adelie Torger… 42 20.2 190
# ℹ 334 more rows
# ℹ 3 more variables: body_mass_g <int>, sex <fct>, year <int>
#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, filtering, summarizing by year and adjusting the labels for the legend=
#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, label left justify,formatting numbers, correcting the x-axis labels (thank you to my colleague @Wes Scott) =
#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, label left justify and scale_x_date, also my colleague @WesScott, formatting numbers. =